/*******************************************************************************
Albouy, Ehrlich, Shin
February 2017
This do filC:
	- Merges Albouy Ehrlich Shin Land Price Data with Davis Palumbo Land Price data
	- Compares them 
	
*******************************************************************************/

clear
set more off
set autotabgraphs on
graph drop _all
graph set window fontface "Times New Roman"
graph set ps fontface "Times New Roman"
graph set eps fontface "Times New Roman"
estimates drop _all
pause on

version 13.1

//cd "C:\users\gehrlich\Dropbox\Land Values\REStat\Revision Results\Round 3 Final"

/* First import and format our land value estimates*/

import excel "tab_lv_log2m6_estX_M2.xls", sheet("land_values (MSAs)") firstrow clear
drop LVLand LVtotalLand LVcenter-LV40mile
reshape long LV, i(combofips) j(year)
rename LV LVAES
rename LVUA LVAESavg
rename LVtotalUA AESTotalLandValue
save AESLandValues.dta, replace

/* Now import and format Davis and Palumbo land value estimates*/
import excel "Davis Palumbo Land Prices Panel.xlsx", sheet("landdata-msas-2016q1") firstrow clear // this file contains the entire history of the DP land values

// Generate FIPS codes
gen combofips = 520 if MSA=="ATLANTA"
replace combofips = 720 if MSA=="BALTIMORE"
replace combofips = 1000 if MSA=="BIRMINGHAM"
replace combofips = 1120 if MSA=="BOSTON"
replace combofips = 1280 if MSA=="BUFFALO"
replace combofips = 1520 if MSA=="CHARLOTTE"
replace combofips = 1600 if MSA=="CHICAGO"
replace combofips = 1640 if MSA=="CINCINNATI"
replace combofips = 1680 if MSA=="CLEVELAND"
replace combofips = 1840 if MSA=="COLUMBUS"
replace combofips = 1920 if MSA=="DALLAS"
replace combofips = 2080 if MSA=="DENVER"
replace combofips = 2160 if MSA=="DETROIT"
replace combofips = 2800 if MSA=="FORTWORTH"
replace combofips = 3280 if MSA=="HARTFORD"
replace combofips = 3360 if MSA=="HOUSTON"
replace combofips = 3480 if MSA=="INDIANAPOLIS"
replace combofips = 3760 if MSA=="KANSASCITY"
replace combofips = 4480 if MSA=="LOSANGELES"
replace combofips = 4920 if MSA=="MEMPHIS"
replace combofips = 5000 if MSA=="MIAMI"
replace combofips = 5080 if MSA=="MILWAUKEE"
replace combofips = 5120 if MSA=="MINNEAPOLISSTPAUL"
replace combofips = 5560 if MSA=="NEWORLEANS"
replace combofips = 5600 if MSA=="NEWYORK"
replace combofips = 5720 if MSA=="NORFOLK"
replace combofips = 5775 if MSA=="OAKLAND"
replace combofips = 5880 if MSA=="OKLAHOMACITY"
replace combofips = 6160 if MSA=="PHILADELPHIA"
replace combofips = 6200 if MSA=="PHOENIX"
replace combofips = 6280 if MSA=="PITTSBURGH"
replace combofips = 6440 if MSA=="PORTLAND"
replace combofips = 6480 if MSA=="PROVIDENCE"
replace combofips = 6840 if MSA=="ROCHESTER"
replace combofips = 6920 if MSA=="SACRAMENTO"
replace combofips = 7160 if MSA=="SALTLAKECITY"
replace combofips = 7240 if MSA=="SANANTONIO"
replace combofips = 6780 if MSA=="SANBERNADINO" // this one is out of order because the official name is Riverside-San Bernardino, CA
replace combofips = 7320 if MSA=="SANDIEGO"
replace combofips = 7360 if MSA=="SANFRANCISCO"
replace combofips = 7400 if MSA=="SANJOSE"
replace combofips = 5945 if MSA=="SANTAANA" // this one is out of order because the official name is Orange County, CA
replace combofips = 7600 if MSA=="SEATTLE"
replace combofips = 7040 if MSA=="STLOUIS" // this one is out of order because they seem to alphabetize it as "Saint Louis" even though the official name is listed as "St. Louis, MO-IL"
replace combofips = 8280 if MSA=="TAMPA"
replace combofips = 8840 if MSA=="WASHINGTONDC"

// Convert dates into Stata format
gen yearstr = substr(Date,1,4)
gen qtrstr = substr(Date,6,1)
destring yearstr qtrstr, gen(year qtr)
gen yearqtr = yq(year,qtr)

// Take geometric average land values per year and overall from 2005q1-2010q4
sca def startyq = yq(2005,1)
sca def endyq = yq(2010,4)
drop if !inrange(yearqtr,startyq,endyq)
sort combofips year
by combofips year: egen LVDP = mean(ln(LandValue))
by combofips: egen LVDP_avg = mean(ln(LandValue))
replace LVDP = exp(LVDP)
replace LVDP_avg = exp(LVDP_avg)
collapse (mean) LVDP LVDP_avg, by(combofips year)
save DavisPalumboLandValues.dta, replace

/* Now import and format Davis and Palumbo land value estimates (cross-sectional)*/
import excel "Urban Housing Units.xlsx", sheet("Sheet1") firstrow clear
save UrbanHousingUnits.dta, replace

/* Now merge DP LVs with AES LVs */
use AESLandValues.dta, clear
merge 1:1 combofips year using DavisPalumboLandValues.dta
drop if _merge!=3
drop _merge

gen lnLVDP = ln(LVDP)
gen lnLVAES = ln(LVAES)
gen lnLVAESavg = ln(LVAESavg)

gen urbanarea = AESTotalLandValue/LVAESavg
gen totalvalue_AES = LVAES*urbanarea

// LVUA is our cross-sectional estimate of land values, which does not exactly equal the geometric mean of the alphas 
collapse (mean) lnLVDP lnLVAESavg AESTotalLandValue (max) max_lnLVDP=lnLVDP max_lnLVAES=lnLVAES ///
	(min) min_lnLVDP=lnLVDP min_lnLVAES=lnLVAES (sd) LVDPsd=LVDP LVAESsd=LVAES, by(combofips)
gen LVDP_avg = exp(lnLVDP)/1000
gen LVAES_avg = exp(lnLVAES)/1000
gen LVDP_min = exp(min_lnLVDP)/1000
gen LVDP_max = exp(max_lnLVDP)/1000
gen LVAES_min = exp(min_lnLVAES)/1000
gen LVAES_max = exp(max_lnLVAES)/1000
gen peaktrough_DP = 100*(LVDP_max-LVDP_min)/LVDP_max
gen peaktrough_AES = 100*(LVAES_max-LVAES_min)/LVAES_max
sum LVDP_avg
sum LVAES_avg

sum peaktrough_AES
sum peaktrough_DP
gen CV_LVDP = (LVDPsd/1000)/LVDP_avg
gen CV_LVAES = (LVAESsd/1000)/LVAES_avg

merge 1:1 combofips using UrbanHousingUnits.dta, keepusing(UrbanHousingUnits)
drop _merge
gen DPValuePerLot = LVDP_avg
gen DPTotalResidentialLandValue = 1000*LVDP_avg*UrbanHousingUnits

gen smsa = combofips
merge 1:1 smsa using ahs_smsalot_2011.dta
list MSAName if _merge!=3  // we do not merge in lot size info for Orange County, CA, FIPS 5945
drop if _merge!=3

gen msalabel = MSAName
egen msalabel2 = ends(msalabel), punct("-") trim head
gen msalabel3 = ""
replace msalabel3 = "San Francisco" if combofips==7360
replace msalabel3 = "San Jose" if combofips==7400
replace msalabel3 = "New York" if combofips==5600
replace msalabel3 = "Los Angeles" if combofips==4480
replace msalabel3 = "Pittsburgh" if combofips==6280
replace msalabel3 = "Chicago" if combofips==1600
replace msalabel3 = "Rochester" if combofips==6840
replace msalabel3 = "Dallas" if combofips==1920
replace msalabel3 = "Atlanta" if combofips==520
replace msalabel3 = "Detroit" if combofips==2160
replace msalabel3 = "Denver" if combofips==2080
replace msalabel3 = "Hartford" if combofips==3280
replace msalabel3 = "Boston" if combofips==1120
replace msalabel3 = "Miami" if combofips==5000
replace msalabel3 = "Washington" if combofips==8840
replace msalabel3 = "Denver" if combofips==2080
replace msalabel3 = "Philadelphia" if combofips==6160
replace msalabel3 = "Houston" if combofips==3360
replace msalabel3 = "Clevleand" if combofips==1680
replace msalabel3 = "St Louis" if combofips==7040
replace msalabel3 = "Minneapolis" if combofips==5120
replace msalabel3 = "Indianapolis" if combofips==3480
replace msalabel3 = "Portland" if combofips==6440


/***  adjust units and take out one half of rented units **/
replace AESTotalLandValue = AESTotalLandValue/1000000000
replace DPTotalResidentialLandValue = (1 - (1-ownocc)/2)*DPTotalResidentialLandValue/1000000000

gen lnAEStot = ln(AESTotalLandValue)
gen lnDPtot = ln(DPTotalResidentialLandValue)

reg lnDPtot lnAEStot
predict bestfit1, xb
replace bestfit1 = exp(bestfit1)
local slope1: di %5.2f = _b[lnAEStot]
local slope1_se: di %5.2f = _se[lnAEStot]
local int1: di %5.2f = _b[_cons]
local corr1: di %5.2f = sqrt(e(r2))


reg peaktrough_DP peaktrough_AES
predict bestfit2, xb
local slope2: di %5.2f = _b[peaktrough_AES]
local slope2_se: di %5.2f = _se[peaktrough_AES]
local int2: di %5.2f = _b[_cons]
local corr2: di %5.2f = sqrt(e(r2))


sort AESTotalLandValue

local xsize = 7
local ysize = 3

gen mlabposB = 9
replace mlabposB = 3 if combofips==6840 // Rochester

twoway (line AESTotalLandValue AESTotalLandValue, lpat(solid) lcolor(gs2) lwidth(0.2)) ///
	(line bestfit1 AESTotalLandValue, lpat(dash) lcolor(gs2) lwidth(0.2)) ///
	(scatter DPTotalResidentialLandValue AESTotalLandValue, ///
	lwidth(none) graphregion(color(white)) color(gs13) lcolor(black) msymbol(oh) msize(large) mcolor(black) ///
	mlab(msalabel3) mlabcol(black) mlabsize(3) mlabvposition(mlabposB) ///
	graphregion(color(white))), ///
	ytitle("Davis and Palumbo Land Value per Lot" "times Urban Housing Units ($ billions)", size(2.5)) ylabel(,labsize(2.5)) ///
	xtitle("Total Transactions-Based Urban Land Value Estimate ($ billions)", size(2.5)) ///
	xscale(log titlegap(2)) xlabel(64 125 250 500 1000 2000, labsize(2.5) grid) ylabel(4 8 16 32 64 125 250 500 1000 2000, labsize(2.5)) yscale(log titlegap(2)) ///
	legend(order(3 1 2) label(1 "45-degree line") label(2 "Best Fit: `int1' + `slope1' (`slope1_se') * AES Est.") ///
	label(3 "Estimated Values, correlation = `corr1' ") size(small) symxsize(6)) ///
	xsize(`xsize') ysize(`ysize') ///
	name(levels, replace)
graph export "dp_comp_B.pdf", as(pdf) fontface("Times New Roman") replace

sort peaktrough_AES

gen mlabposC = 9
replace mlabposC = 3 if combofips==7360 // San Francisco
replace mlabposC = 3 if combofips==6280 // Pittsburgh
replace mlabposC = 10 if combofips==4480 // Los Angeles
replace mlabposC = 9 if combofips==8840 // Washington

twoway (line peaktrough_AES peaktrough_AES, lpat(solid) lcolor(gs2) lwidth(0.2)) ///
	(line bestfit2 peaktrough_AES, lpat(dash) lcolor(gs2) lwidth(0.2)) ///
	(scatter peaktrough_DP peaktrough_AES, ///
	lwidth(none) graphregion(color(white)) color(gs13) lcolor(black) msymbol(oh) msize(large) mcolor(black)  ///
	mlab(msalabel3) mlabcol(black) mlabsize(3) mlabvposition(mlabposC) ///
	graphregion(color(white))), ///
	ytitle("Davis and Palumbo Within-City Price Variation (%)", size(2.5)) ylabel(,labsize(2.5)) ///
	xtitle("Transactions-Estimated Within-City Price Variation (%)", size(2.5))  xlabel(,grid labsize(2.5))  ///
	legend(order(3 1 2) label(1 "45-degree line") label(2 "Best Fit: `int2' + `slope2' (`slope2_se') * AES Est.") ///
	label(3 "Estimated Values,  correlation = `corr2' ") size(small) symxsize(6)) ///
	xscale(titlegap(2)) yscale(titlegap(2)) ///
	xsize(`xsize') ysize(`ysize') ///
	name(change, replace)
graph export "dp_comp_C.pdf", as(pdf) fontface("Times New Roman") replace

gen LVDP_acre = LVDP_avg/lot_acre
gen lnLVDP_acre = ln(LVDP_acre)
*gen lnLVAESavg = ln(LVAESavg)
reg lnLVDP_acre lnLVAESavg 
predict bestfit3, xb
replace bestfit3 = exp(bestfit3)
local slope3: di %5.2f = _b[lnLVAESavg]
local slope3_se: di %5.2f = _se[lnLVAESavg]
local int3: di %5.2f = _b[_cons]
local corr3: di %5.2f = sqrt(e(r2))

*replace bestfit3=. if bestfit3<20

gen mlabposA = 9

sort LVAES_avg
twoway (line LVAES_avg LVAES_avg, lpat(solid) lcolor(gs2) lwidth(0.2)) ///
	(line bestfit3 LVAES_avg, lpat(dash) lcolor(gs2) lwidth(0.2)) ///
	(scatter LVDP_acre LVAES_avg, ///
	lwidth(none) graphregion(color(white)) mcolor(gs13) msymbol(oh) msize(large) mcolor(black) lcolor(black) ///
	mlab(msalabel3) mlabcol(black) mlabsize(3) mlabvposition(mlabposA) ///
	graphregion(color(white))), ///
	ytitle("Davis and Palumbo Value per Estimated Acre ($000s)", size(2.5)) ylabel(,labsize(2.5)) ///
	xtitle("Transactions-Based Value per Acre ($000s)", size(2.5)) ///
	legend(order(3 1 2) label(1 "45-degree line") label(2 "Best Fit: `int3' + `slope3' (`slope3_se') *  AES Est.") ///
	label(3 "Estimated Values,  correlation = `corr3' ") size(small) symxsize(6)) ///
	xsize(`xsize') ysize(`ysize') ///
	xscale(log titlegap(2)) xlabel(125 250 500 1000 2000 4000, grid labsize(2.5)) ylabel(16 32 64 125 250 500 1000 2000 4000 8000, labsize(2.5)) yscale(log titlegap(2)) ///
	name(peracre, replace)
graph export "dp_comp_A.pdf", as(pdf) fontface("Times New Roman") replace

replace DPValuePerLot = DPValuePerLot
gsort -LVAES_avg
browse combofips MSAName DPValuePerLot LVDP_acre LVAES_avg DPTotalResidentialLandValue AESTotalLandValue ///
	CV_LVDP CV_LVAES // copy and paste into table A3
